package com.edu.ocrdemo.config;

import net.sourceforge.tess4j.Tesseract;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

/**
 * @Author tfan
 * @Description
 * @Date 2024/12/31 16:33
 **/
@Configuration
public class TesseractOcrConfiguration {
    @Value("${tess4j.datapath}")
    private String dataPath;

    @Bean
    public Tesseract tesseract() {

        Tesseract tesseract = new Tesseract();
        // 设置训练数据文件夹路径
        System.out.println("tess4j.datapath: " + dataPath);
        tesseract.setDatapath(dataPath);
        // 设置为中文简体
        tesseract.setLanguage("chi_sim+eng");
        // 设置页面分割模式
        tesseract.setPageSegMode(1); // PSM_AUTO_OSD
        // 设置图像预处理参数
//        tesseract.setTessVariable("textord_min_linesize", "2.5");
        //tesseract.setTessVariable("tessedit_char_whitelist", "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
        // 设置Ocr引擎模式
        tesseract.setOcrEngineMode(2);
        return tesseract;
    }
}
